This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.
Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Cmd+Shift+Enter.
# library for LMM
library(lme4)
Loading required package: Matrix
library(lmerTest)
Attaching package: ‘lmerTest’
The following object is masked from ‘package:lme4’:
lmer
The following object is masked from ‘package:stats’:
step
library(car)
Loading required package: carData
df<-read.csv("input/scores_commits.csv", header =TRUE, sep=",")
df <- df[complete.cases(df), ] # Apply complete.cases function
df
# convert to nominal factor
df$Group = factor(df$Group)
df$phase = factor(df$phase)
df$log_novelty <- log(df$novelty+1)
df$log_user_requirement <- log(df$user.requirement+1)
df$log_infovis <- log(df$infovis+1)
df$log_total <- log(df$total+1)
df$log_count <- log(df$count+1)
df$Q7_Q7_1 <- log(df$Q7_Q7_1+1)
df$Q7_Q7_2 <- log(df$Q7_Q7_2+1)
df$Q8_Q8_1 <- log(df$Q8_Q8_1+1)
df$Q10 <- log(df$Q10+1)
# standardizing variables for skills and aspirations.
cols <- c("Q7_Q7_1", "Q7_Q7_2", "Q8_Q8_1", "Q10", "log_novelty", "log_user_requirement", "log_infovis", "log_total", "log_count")
df[cols] <- scale(df[cols])
df
mod.reduce.novelty <- lmer( log_novelty ~ log_count + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10 + ( 1 | phase ), data = df, REML = FALSE)
summary(mod.reduce.novelty)
Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's method ['lmerModLmerTest']
Formula: log_novelty ~ log_count + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10 + (1 | phase)
Data: df
AIC BIC logLik deviance df.resid
2103.8 2141.3 -1043.9 2087.8 792
Scaled residuals:
Min 1Q Median 3Q Max
-2.2633 -0.8860 0.1105 0.8440 2.1308
Random effects:
Groups Name Variance Std.Dev.
phase (Intercept) 0.07241 0.2691
Residual 0.78242 0.8845
Number of obs: 800, groups: phase, 5
Fixed effects:
Estimate Std. Error df t value Pr(>|t|)
(Intercept) -9.165e-15 1.243e-01 4.992e+00 0.000 1.00000
log_count 3.257e-01 3.179e-02 7.962e+02 10.244 < 2e-16 ***
Q7_Q7_1 -1.935e-01 3.929e-02 7.950e+02 -4.924 1.03e-06 ***
Q7_Q7_2 1.787e-01 4.027e-02 7.950e+02 4.439 1.03e-05 ***
Q8_Q8_1 1.818e-03 3.459e-02 7.950e+02 0.053 0.95811
Q10 1.014e-01 3.375e-02 7.950e+02 3.004 0.00275 **
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Correlation of Fixed Effects:
(Intr) lg_cnt Q7_Q7_1 Q7_Q7_2 Q8_Q8_
log_count 0.000
Q7_Q7_1 0.000 0.092
Q7_Q7_2 0.000 -0.050 -0.556
Q8_Q8_1 0.000 -0.062 -0.085 -0.167
Q10 0.000 -0.066 -0.021 -0.120 -0.273
AIC(mod.reduce.novelty)
[1] 2103.817
BIC(mod.reduce.novelty)
[1] 2141.294
mod.full.novelty <- lmer( log_novelty ~ factor(Group) + log_count + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10 + (1 | phase), data = df, REML = FALSE)
summary(mod.full.novelty)
Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's method ['lmerModLmerTest']
Formula: log_novelty ~ factor(Group) + log_count + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10 + (1 | phase)
Data: df
AIC BIC logLik deviance df.resid
2102.5 2154.0 -1040.2 2080.5 789
Scaled residuals:
Min 1Q Median 3Q Max
-2.3873 -0.9114 0.1088 0.8567 2.1940
Random effects:
Groups Name Variance Std.Dev.
phase (Intercept) 0.07219 0.2687
Residual 0.77526 0.8805
Number of obs: 800, groups: phase, 5
Fixed effects:
Estimate Std. Error df t value Pr(>|t|)
(Intercept) -0.127049 0.136604 7.320729 -0.930 0.38199
factor(Group)1 0.232085 0.090159 794.997658 2.574 0.01023 *
factor(Group)2 0.167314 0.090050 794.992569 1.858 0.06354 .
factor(Group)3 0.096403 0.089581 794.994619 1.076 0.28219
log_count 0.321371 0.031722 796.231055 10.131 < 2e-16 ***
Q7_Q7_1 -0.195842 0.039288 795.004690 -4.985 7.62e-07 ***
Q7_Q7_2 0.176265 0.040328 794.996130 4.371 1.40e-05 ***
Q8_Q8_1 -0.000345 0.034500 794.997224 -0.010 0.99202
Q10 0.097272 0.034151 794.996547 2.848 0.00451 **
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Correlation of Fixed Effects:
(Intr) fc(G)1 fc(G)2 fc(G)3 lg_cnt Q7_Q7_1 Q7_Q7_2 Q8_Q8_
factr(Grp)1 -0.344
factr(Grp)2 -0.344 0.512
factr(Grp)3 -0.346 0.526 0.525
log_count 0.019 -0.063 -0.011 -0.041
Q7_Q7_1 0.002 -0.046 0.046 -0.012 0.096
Q7_Q7_2 0.015 0.008 -0.088 -0.008 -0.053 -0.560
Q8_Q8_1 0.022 -0.022 -0.050 -0.056 -0.060 -0.086 -0.162
Q10 -0.005 -0.082 0.095 0.018 -0.056 -0.004 -0.135 -0.274
AIC(mod.full.novelty)
[1] 2102.49
BIC(mod.full.novelty)
[1] 2154.021
anova(mod.full.novelty)
Type III Analysis of Variance Table with Satterthwaite's method
Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
factor(Group) 5.706 1.902 3 794.99 2.4534 0.062061 .
log_count 79.569 79.569 1 796.23 102.6344 < 2.2e-16 ***
Q7_Q7_1 19.264 19.264 1 795.00 24.8486 7.615e-07 ***
Q7_Q7_2 14.810 14.810 1 795.00 19.1037 1.402e-05 ***
Q8_Q8_1 0.000 0.000 1 795.00 0.0001 0.992024
Q10 6.289 6.289 1 795.00 8.1126 0.004509 **
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
anova(mod.reduce.novelty, mod.full.novelty)
Data: df
Models:
mod.reduce.novelty: log_novelty ~ log_count + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10 + (1 | phase)
mod.full.novelty: log_novelty ~ factor(Group) + log_count + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10 + (1 | phase)
npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
mod.reduce.novelty 8 2103.8 2141.3 -1043.9 2087.8
mod.full.novelty 11 2102.5 2154.0 -1040.2 2080.5 7.3266 3 0.06219 .
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
library(ALSM)
Loading required package: leaps
Loading required package: SuppDists
step(lm(log_novelty ~ factor(Group) + log_count + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10, data=df),
method="both", trace = 1 )
Start: AIC=-115.17
log_novelty ~ factor(Group) + log_count + Q7_Q7_1 + Q7_Q7_2 +
Q8_Q8_1 + Q10
Df Sum of Sq RSS AIC
- Q8_Q8_1 1 0.002 677.32 -117.172
<none> 677.32 -115.173
- factor(Group) 3 5.900 683.22 -114.235
- Q10 1 6.515 683.84 -109.515
- Q7_Q7_2 1 15.138 692.46 -99.490
- Q7_Q7_1 1 19.947 697.27 -93.954
- log_count 1 66.787 744.11 -41.940
Step: AIC=-117.17
log_novelty ~ factor(Group) + log_count + Q7_Q7_1 + Q7_Q7_2 +
Q10
Df Sum of Sq RSS AIC
<none> 677.32 -117.172
- factor(Group) 3 5.908 683.23 -116.224
- Q10 1 7.106 684.43 -110.823
- Q7_Q7_2 1 15.596 692.92 -100.960
- Q7_Q7_1 1 20.066 697.39 -95.815
- log_count 1 67.067 744.39 -43.638
Call:
lm(formula = log_novelty ~ factor(Group) + log_count + Q7_Q7_1 +
Q7_Q7_2 + Q10, data = df)
Coefficients:
(Intercept) factor(Group)1 factor(Group)2 factor(Group)3 log_count Q7_Q7_1 Q7_Q7_2
-0.12958 0.23732 0.16843 0.09995 0.29271 -0.19912 0.17849
Q10
0.09942
mod.reduce.ur <- lm(log_user_requirement ~ log_count + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10 , data = df)
summary(mod.reduce.ur)
Call:
lm(formula = log_user_requirement ~ log_count + Q7_Q7_1 + Q7_Q7_2 +
Q8_Q8_1 + Q10, data = df)
Residuals:
Min 1Q Median 3Q Max
-1.9234 -0.9854 0.3754 0.7441 1.6632
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -3.509e-16 3.348e-02 0.000 1.000000
log_count 2.476e-01 3.383e-02 7.319 6.11e-13 ***
Q7_Q7_1 -1.897e-01 4.207e-02 -4.509 7.51e-06 ***
Q7_Q7_2 1.157e-01 4.311e-02 2.685 0.007412 **
Q8_Q8_1 -4.640e-03 3.703e-02 -0.125 0.900327
Q10 1.194e-01 3.613e-02 3.305 0.000993 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.9471 on 794 degrees of freedom
Multiple R-squared: 0.1086, Adjusted R-squared: 0.103
F-statistic: 19.36 on 5 and 794 DF, p-value: < 2.2e-16
AIC(mod.reduce.ur)
[1] 2191.288
BIC(mod.reduce.ur)
[1] 2224.081
mod.full.ur <- lm(log_user_requirement ~ factor(Group) + log_count + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10, data = df)
summary(mod.full.ur)
Call:
lm(formula = log_user_requirement ~ factor(Group) + log_count +
Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10, data = df)
Residuals:
Min 1Q Median 3Q Max
-2.0497 -0.9697 0.3109 0.7168 1.7587
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.20771 0.06930 -2.997 0.002809 **
factor(Group)1 0.30843 0.09616 3.207 0.001393 **
factor(Group)2 0.15234 0.09604 1.586 0.113097
factor(Group)3 0.34981 0.09554 3.661 0.000268 ***
log_count 0.23922 0.03363 7.114 2.53e-12 ***
Q7_Q7_1 -0.19761 0.04190 -4.716 2.84e-06 ***
Q7_Q7_2 0.11979 0.04301 2.785 0.005482 **
Q8_Q8_1 -0.01073 0.03680 -0.292 0.770735
Q10 0.11094 0.03642 3.046 0.002398 **
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.9391 on 791 degrees of freedom
Multiple R-squared: 0.1269, Adjusted R-squared: 0.1181
F-statistic: 14.37 on 8 and 791 DF, p-value: < 2.2e-16
AIC(mod.full.ur)
[1] 2180.715
BIC(mod.full.ur)
[1] 2227.561
anova(mod.reduce.ur, mod.full.ur)
Analysis of Variance Table
Model 1: log_user_requirement ~ log_count + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 +
Q10
Model 2: log_user_requirement ~ factor(Group) + log_count + Q7_Q7_1 +
Q7_Q7_2 + Q8_Q8_1 + Q10
Res.Df RSS Df Sum of Sq F Pr(>F)
1 794 712.19
2 791 697.59 3 14.602 5.5192 0.0009401 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
library(ALSM)
step(lm(log_user_requirement ~ factor(Group) + log_count + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10, data=df),
method="both", trace = 1 )
Start: AIC=-91.59
log_user_requirement ~ factor(Group) + log_count + Q7_Q7_1 +
Q7_Q7_2 + Q8_Q8_1 + Q10
Df Sum of Sq RSS AIC
- Q8_Q8_1 1 0.075 697.66 -93.500
<none> 697.59 -91.586
- Q7_Q7_2 1 6.840 704.43 -85.780
- Q10 1 8.181 705.77 -84.258
- factor(Group) 3 14.602 712.19 -81.013
- Q7_Q7_1 1 19.617 717.20 -71.400
- log_count 1 44.633 742.22 -43.971
Step: AIC=-93.5
log_user_requirement ~ factor(Group) + log_count + Q7_Q7_1 +
Q7_Q7_2 + Q10
Df Sum of Sq RSS AIC
<none> 697.66 -93.500
- Q7_Q7_2 1 6.788 704.45 -87.755
- Q10 1 8.389 706.05 -85.938
- factor(Group) 3 14.541 712.20 -82.997
- Q7_Q7_1 1 19.975 717.64 -72.917
- log_count 1 44.575 742.24 -45.954
Call:
lm(formula = log_user_requirement ~ factor(Group) + log_count +
Q7_Q7_1 + Q7_Q7_2 + Q10, data = df)
Coefficients:
(Intercept) factor(Group)1 factor(Group)2 factor(Group)3 log_count Q7_Q7_1 Q7_Q7_2
-0.2068 0.3078 0.1509 0.3482 0.2386 -0.1987 0.1178
Q10
0.1080
mod.reduce.vis <- lmer( log_infovis ~ log_count + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10 + ( 1 | phase) , data = df, REML = FALSE)
summary(mod.reduce.vis)
Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's method ['lmerModLmerTest']
Formula: log_infovis ~ log_count + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10 + (1 | phase)
Data: df
AIC BIC logLik deviance df.resid
2124.2 2161.7 -1054.1 2108.2 792
Scaled residuals:
Min 1Q Median 3Q Max
-3.2613 -0.1514 0.3040 0.6139 1.7276
Random effects:
Groups Name Variance Std.Dev.
phase (Intercept) 0.04898 0.2213
Residual 0.80459 0.8970
Number of obs: 800, groups: phase, 5
Fixed effects:
Estimate Std. Error df t value Pr(>|t|)
(Intercept) -3.387e-15 1.039e-01 4.990e+00 0.000 1.00000
log_count 3.267e-01 3.223e-02 7.968e+02 10.137 < 2e-16 ***
Q7_Q7_1 -1.748e-01 3.984e-02 7.950e+02 -4.386 1.31e-05 ***
Q7_Q7_2 2.028e-01 4.083e-02 7.950e+02 4.965 8.39e-07 ***
Q8_Q8_1 -7.557e-02 3.507e-02 7.950e+02 -2.155 0.03149 *
Q10 1.007e-01 3.422e-02 7.950e+02 2.944 0.00334 **
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Correlation of Fixed Effects:
(Intr) lg_cnt Q7_Q7_1 Q7_Q7_2 Q8_Q8_
log_count 0.000
Q7_Q7_1 0.000 0.092
Q7_Q7_2 0.000 -0.050 -0.556
Q8_Q8_1 0.000 -0.062 -0.085 -0.167
Q10 0.000 -0.066 -0.021 -0.120 -0.273
AIC(mod.reduce.vis)
[1] 2124.233
BIC(mod.reduce.vis)
[1] 2161.709
mod.full.vis <- lmer( log_infovis ~ factor(Group) + log_count + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10 + ( 1 | phase) , data = df, REML = FALSE)
summary(mod.full.vis)
Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's method ['lmerModLmerTest']
Formula: log_infovis ~ factor(Group) + log_count + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10 + (1 | phase)
Data: df
AIC BIC logLik deviance df.resid
2107.2 2158.8 -1042.6 2085.2 789
Scaled residuals:
Min 1Q Median 3Q Max
-3.4115 -0.1553 0.3094 0.6174 1.6554
Random effects:
Groups Name Variance Std.Dev.
phase (Intercept) 0.04873 0.2207
Residual 0.78168 0.8841
Number of obs: 800, groups: phase, 5
Fixed effects:
Estimate Std. Error df t value Pr(>|t|)
(Intercept) -0.27038 0.11833 8.50468 -2.285 0.049801 *
factor(Group)1 0.36811 0.09053 794.99787 4.066 5.26e-05 ***
factor(Group)2 0.30044 0.09042 794.99050 3.323 0.000932 ***
factor(Group)3 0.38658 0.08995 794.99347 4.298 1.94e-05 ***
log_count 0.31844 0.03185 796.72863 9.999 < 2e-16 ***
Q7_Q7_1 -0.17878 0.03945 795.00805 -4.532 6.74e-06 ***
Q7_Q7_2 0.19995 0.04049 794.99566 4.938 9.64e-07 ***
Q8_Q8_1 -0.08388 0.03464 794.99724 -2.421 0.015682 *
Q10 0.09844 0.03429 794.99626 2.871 0.004207 **
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Correlation of Fixed Effects:
(Intr) fc(G)1 fc(G)2 fc(G)3 lg_cnt Q7_Q7_1 Q7_Q7_2 Q8_Q8_
factr(Grp)1 -0.399
factr(Grp)2 -0.398 0.512
factr(Grp)3 -0.401 0.526 0.525
log_count 0.022 -0.063 -0.011 -0.041
Q7_Q7_1 0.002 -0.046 0.046 -0.012 0.096
Q7_Q7_2 0.017 0.008 -0.088 -0.008 -0.053 -0.560
Q8_Q8_1 0.025 -0.022 -0.050 -0.056 -0.060 -0.086 -0.162
Q10 -0.006 -0.082 0.095 0.018 -0.056 -0.004 -0.135 -0.274
AIC(mod.full.vis)
[1] 2107.234
BIC(mod.full.vis)
[1] 2158.764
anova(mod.reduce.vis, mod.full.vis)
Data: df
Models:
mod.reduce.vis: log_infovis ~ log_count + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10 + (1 | phase)
mod.full.vis: log_infovis ~ factor(Group) + log_count + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10 + (1 | phase)
npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
mod.reduce.vis 8 2124.2 2161.7 -1054.1 2108.2
mod.full.vis 11 2107.2 2158.8 -1042.6 2085.2 22.999 3 4.04e-05 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
library(ALSM)
step(lm(log_infovis ~ Group + log_count + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10, data=df),
method="both", trace = 1 )
Start: AIC=-131.12
log_infovis ~ Group + log_count + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 +
Q10
Df Sum of Sq RSS AIC
<none> 663.96 -131.117
- Q8_Q8_1 1 4.428 668.38 -127.799
- Q10 1 6.615 670.57 -125.186
- Group 3 18.523 682.48 -115.104
- Q7_Q7_1 1 16.528 680.48 -113.446
- Q7_Q7_2 1 19.340 683.30 -110.147
- log_count 1 68.596 732.55 -54.463
Call:
lm(formula = log_infovis ~ Group + log_count + Q7_Q7_1 + Q7_Q7_2 +
Q8_Q8_1 + Q10, data = df)
Coefficients:
(Intercept) Group1 Group2 Group3 log_count Q7_Q7_1 Q7_Q7_2 Q8_Q8_1
-0.27221 0.37203 0.30113 0.38910 0.29656 -0.18139 0.20142 -0.08245
Q10
0.09975
mod.reduce.total <- lm( log_total ~ log_count + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10 , data = df)
summary(mod.reduce.total)
Call:
lm(formula = log_total ~ log_count + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 +
Q10, data = df)
Residuals:
Min 1Q Median 3Q Max
-3.1861 -0.1993 0.2443 0.5703 1.4738
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -1.422e-16 3.222e-02 0.000 1.000000
log_count 3.419e-01 3.255e-02 10.503 < 2e-16 ***
Q7_Q7_1 -1.852e-01 4.048e-02 -4.576 5.51e-06 ***
Q7_Q7_2 1.875e-01 4.148e-02 4.520 7.12e-06 ***
Q8_Q8_1 -8.656e-02 3.563e-02 -2.429 0.015349 *
Q10 1.241e-01 3.477e-02 3.570 0.000378 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.9113 on 794 degrees of freedom
Multiple R-squared: 0.1748, Adjusted R-squared: 0.1696
F-statistic: 33.63 on 5 and 794 DF, p-value: < 2.2e-16
AIC(mod.reduce.total)
[1] 2129.645
BIC(mod.reduce.total)
[1] 2162.437
mod.full.total <- lm( log_total ~ Group + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10 , data = df)
summary(mod.full.total)
Call:
lm(formula = log_total ~ Group + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 +
Q10, data = df)
Residuals:
Min 1Q Median 3Q Max
-2.9973 -0.1279 0.2773 0.5482 1.4041
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.29834 0.07061 -4.225 2.66e-05 ***
Group1 0.43251 0.09786 4.420 1.13e-05 ***
Group2 0.28093 0.09793 2.869 0.004231 **
Group3 0.45080 0.09734 4.631 4.25e-06 ***
Q7_Q7_1 -0.23066 0.04253 -5.424 7.76e-08 ***
Q7_Q7_2 0.20946 0.04380 4.783 2.06e-06 ***
Q8_Q8_1 -0.07307 0.03745 -1.951 0.051416 .
Q10 0.13970 0.03708 3.767 0.000177 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.9576 on 792 degrees of freedom
Multiple R-squared: 0.09105, Adjusted R-squared: 0.08302
F-statistic: 11.33 on 7 and 792 DF, p-value: 9.643e-14
AIC(mod.full.total)
[1] 2210.925
BIC(mod.full.total)
[1] 2253.087
anova(mod.reduce.total, mod.full.total)
Analysis of Variance Table
Model 1: log_total ~ log_count + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10
Model 2: log_total ~ Group + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10
Res.Df RSS Df Sum of Sq F Pr(>F)
1 794 659.37
2 792 726.25 2 -66.874
# convert to nominal factor
df$Group = factor(df$Group)
df$phase = factor(df$phase)
library(plyr)
ddply(df, ~ Group * phase, function(data) summary(data$log_novelty) )
ddply(df, ~ Group * phase, summarise, log_novelty.mean=mean(log_novelty), log_novelty.sd = sd(log_novelty))
# histograms for two factors
boxplot(log_novelty ~ Group * phase, data = df, xlab="Group.Phase", ylab="log_novelty")
with(df, interaction.plot(Group, phase, log_novelty, ylim=c(0, max(log_novelty)))) # interaction plot
# histograms for two factors
boxplot(log_novelty ~ Group * phase, data = df, xlab="Group.Phase", ylab="log_user_requirement")
with(df, interaction.plot(Group, phase, log_user_requirement, ylim=c(0, max(log_user_requirement)))) # interaction plot
# histograms for two factors
boxplot(log_novelty ~ Group * phase, data = df, xlab="Group.Phase", ylab="log_infovis")
with(df, interaction.plot(Group, phase, log_infovis, ylim=c(0, max(log_infovis)))) # interaction plot
# histograms for two factors
boxplot(log_novelty ~ Group * phase, data = df, xlab="Group.Phase", ylab="log_total")
with(df, interaction.plot(Group, phase, log_total, ylim=c(0, max(log_total)))) # interaction plot
m = lmer(log_novelty ~ Group + (1|Student), data=df, REML=FALSE)
summary(m)
Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's method ['lmerModLmerTest']
Formula: log_novelty ~ Group + (1 | Student)
Data: df
AIC BIC logLik deviance df.resid
2046.0 2074.1 -1017.0 2034.0 794
Scaled residuals:
Min 1Q Median 3Q Max
-2.2669 -0.6407 0.1015 0.6356 2.3143
Random effects:
Groups Name Variance Std.Dev.
Student (Intercept) 0.4473 0.6688
Residual 0.5366 0.7325
Number of obs: 800, groups: Student, 159
Fixed effects:
Estimate Std. Error df t value Pr(>|t|)
(Intercept) -0.1667 0.1224 159.4644 -1.361 0.175
Group1 0.2933 0.1698 159.0131 1.728 0.086 .
Group2 0.1952 0.1689 159.4644 1.156 0.249
Group3 0.1401 0.1689 159.4644 0.829 0.408
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Correlation of Fixed Effects:
(Intr) Group1 Group2
Group1 -0.721
Group2 -0.725 0.523
Group3 -0.725 0.523 0.526
plot(resid(m, type = "pearson") ~ fitted(m))
qqnorm(resid(m, type = "pearson"))
qqline(resid(m, type = "pearson"))
# library for LMM we will use on relational log_novelty
library(lme4)
library(lmerTest)
library(car)
contrasts(df$Group) <= "contr.sum"
1 2 3
0 TRUE TRUE TRUE
1 TRUE TRUE TRUE
2 TRUE TRUE TRUE
3 TRUE TRUE TRUE
contrasts(df$phase) <= "contr.sum"
2 3 4 5
1 TRUE TRUE TRUE TRUE
2 TRUE TRUE TRUE TRUE
3 TRUE TRUE TRUE TRUE
4 TRUE TRUE TRUE TRUE
5 TRUE TRUE TRUE TRUE
# phase is nested within group
fit <- lm(log_total ~ log_count + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10 , data = df)
summary(fit)
Call:
lm(formula = log_total ~ log_count + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 +
Q10, data = df)
Residuals:
Min 1Q Median 3Q Max
-3.1861 -0.1993 0.2443 0.5703 1.4738
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -1.422e-16 3.222e-02 0.000 1.000000
log_count 3.419e-01 3.255e-02 10.503 < 2e-16 ***
Q7_Q7_1 -1.852e-01 4.048e-02 -4.576 5.51e-06 ***
Q7_Q7_2 1.875e-01 4.148e-02 4.520 7.12e-06 ***
Q8_Q8_1 -8.656e-02 3.563e-02 -2.429 0.015349 *
Q10 1.241e-01 3.477e-02 3.570 0.000378 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.9113 on 794 degrees of freedom
Multiple R-squared: 0.1748, Adjusted R-squared: 0.1696
F-statistic: 33.63 on 5 and 794 DF, p-value: < 2.2e-16
library(multcomp)
Loading required package: mvtnorm
Loading required package: survival
Loading required package: TH.data
Loading required package: MASS
Attaching package: ‘TH.data’
The following object is masked from ‘package:MASS’:
geyser
library(lsmeans)
Loading required package: emmeans
The 'lsmeans' package is now basically a front end for 'emmeans'.
Users are encouraged to switch the rest of the way.
See help('transition') for more information, including how to
convert old 'lsmeans' objects and scripts to work with 'emmeans'.
#summary(glht(fit, lsm(pairwise ~ roup / phase)), test = adjusted(type='holm'))
fit.full <- lm(log_total ~ Group + log_count + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10, data = df)
summary(fit.full)
Call:
lm(formula = log_total ~ Group + log_count + Q7_Q7_1 + Q7_Q7_2 +
Q8_Q8_1 + Q10, data = df)
Residuals:
Min 1Q Median 3Q Max
-3.1191 -0.2201 0.2313 0.5645 1.3650
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.27052 0.06637 -4.076 5.04e-05 ***
Group1 0.37292 0.09209 4.050 5.64e-05 ***
Group2 0.27040 0.09198 2.940 0.003380 **
Group3 0.41236 0.09150 4.507 7.58e-06 ***
log_count 0.33294 0.03220 10.339 < 2e-16 ***
Q7_Q7_1 -0.19094 0.04013 -4.758 2.32e-06 ***
Q7_Q7_2 0.18702 0.04119 4.540 6.49e-06 ***
Q8_Q8_1 -0.09491 0.03524 -2.693 0.007223 **
Q10 0.11965 0.03488 3.430 0.000635 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.8994 on 791 degrees of freedom
Multiple R-squared: 0.1993, Adjusted R-squared: 0.1912
F-statistic: 24.61 on 8 and 791 DF, p-value: < 2.2e-16
anova(fit, fit.full)
Analysis of Variance Table
Model 1: log_total ~ log_count + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10
Model 2: log_total ~ Group + log_count + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 +
Q10
Res.Df RSS Df Sum of Sq F Pr(>F)
1 794 659.37
2 791 639.79 3 19.585 8.0715 2.664e-05 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
# histograms for two factors
boxplot(log_total ~ Group * phase, data = df, xlab="Group.Phase", ylab="log_total")
with(df, interaction.plot(Group, phase, log_total, ylim=c(0, max(log_total)))) # interaction plot
fit.lmer <- lmer(log_total ~ (1 | Group) + (1| phase:Group), data = df, REML= FALSE)
summary(fit.lmer)
Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's method ['lmerModLmerTest']
Formula: log_total ~ (1 | Group) + (1 | phase:Group)
Data: df
AIC BIC logLik deviance df.resid
2254.0 2272.7 -1123.0 2246.0 796
Scaled residuals:
Min 1Q Median 3Q Max
-2.94437 -0.01942 0.28792 0.61016 1.43336
Random effects:
Groups Name Variance Std.Dev.
phase:Group (Intercept) 0.03675 0.1917
Group (Intercept) 0.02060 0.1435
Residual 0.94281 0.9710
Number of obs: 800, groups: phase:Group, 20; Group, 4
Fixed effects:
Estimate Std. Error df t value Pr(>|t|)
(Intercept) -0.006239 0.090377 3.950735 -0.069 0.948
library(lmerTest)
fit.lmer <- lmer(log_total ~ Group + log_count + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10 + ( 1 | phase), data = df, REML= FALSE)
summary(fit.lmer)
Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's method ['lmerModLmerTest']
Formula: log_total ~ Group + log_count + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10 + (1 | phase)
Data: df
AIC BIC logLik deviance df.resid
2071.8 2123.3 -1024.9 2049.8 789
Scaled residuals:
Min 1Q Median 3Q Max
-3.7050 -0.2260 0.2540 0.5958 1.8126
Random effects:
Groups Name Variance Std.Dev.
phase (Intercept) 0.05292 0.2300
Residual 0.74729 0.8645
Number of obs: 800, groups: phase, 5
Fixed effects:
Estimate Std. Error df t value Pr(>|t|)
(Intercept) -0.26867 0.12105 8.07626 -2.219 0.056926 .
Group1 0.36895 0.08852 794.99855 4.168 3.41e-05 ***
Group2 0.26970 0.08841 794.99199 3.051 0.002360 **
Group3 0.40980 0.08795 794.99463 4.659 3.72e-06 ***
log_count 0.35508 0.03114 796.55633 11.402 < 2e-16 ***
Q7_Q7_1 -0.18830 0.03857 795.00761 -4.882 1.27e-06 ***
Q7_Q7_2 0.18553 0.03959 794.99658 4.686 3.28e-06 ***
Q8_Q8_1 -0.09636 0.03387 794.99799 -2.845 0.004556 **
Q10 0.11832 0.03353 794.99712 3.529 0.000441 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Correlation of Fixed Effects:
(Intr) Group1 Group2 Group3 lg_cnt Q7_Q7_1 Q7_Q7_2 Q8_Q8_
Group1 -0.381
Group2 -0.381 0.512
Group3 -0.383 0.526 0.525
log_count 0.021 -0.063 -0.011 -0.041
Q7_Q7_1 0.002 -0.046 0.046 -0.012 0.096
Q7_Q7_2 0.017 0.008 -0.088 -0.008 -0.053 -0.560
Q8_Q8_1 0.024 -0.022 -0.050 -0.056 -0.060 -0.086 -0.162
Q10 -0.006 -0.082 0.095 0.018 -0.056 -0.004 -0.135 -0.274
anova(fit.lmer)
Type III Analysis of Variance Table with Satterthwaite's method
Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
Group 19.278 6.426 3 794.99 8.5989 1.272e-05 ***
log_count 97.157 97.157 1 796.56 130.0129 < 2.2e-16 ***
Q7_Q7_1 17.809 17.809 1 795.01 23.8318 1.271e-06 ***
Q7_Q7_2 16.407 16.407 1 795.00 21.9558 3.281e-06 ***
Q8_Q8_1 6.048 6.048 1 795.00 8.0936 0.0045564 **
Q10 9.306 9.306 1 795.00 12.4524 0.0004414 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
library(multcomp)
summary(glht(fit.lmer, linfct = mcp(Group = "Tukey")), test = adjusted("holm"))
Simultaneous Tests for General Linear Hypotheses
Multiple Comparisons of Means: Tukey Contrasts
Fit: lmer(formula = log_total ~ Group + log_count + Q7_Q7_1 + Q7_Q7_2 +
Q8_Q8_1 + Q10 + (1 | phase), data = df, REML = FALSE)
Linear Hypotheses:
Estimate Std. Error z value Pr(>|z|)
1 - 0 == 0 0.36895 0.08852 4.168 0.000154 ***
2 - 0 == 0 0.26970 0.08841 3.051 0.009136 **
3 - 0 == 0 0.40980 0.08795 4.659 1.9e-05 ***
2 - 1 == 0 -0.09925 0.08739 -1.136 0.512120
3 - 1 == 0 0.04085 0.08594 0.475 0.634555
3 - 2 == 0 0.14010 0.08597 1.630 0.309475
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Adjusted p values reported -- holm method)
fit.lmer.reduced <- lmer(log_total ~ log_count + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10 + ( 1 | phase), data = df, REML=FALSE)
summary(fit.lmer.reduced)
Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's method ['lmerModLmerTest']
Formula: log_total ~ log_count + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10 + (1 | phase)
Data: df
AIC BIC logLik deviance df.resid
2091.2 2128.7 -1037.6 2075.2 792
Scaled residuals:
Min 1Q Median 3Q Max
-3.4850 -0.2298 0.2612 0.5992 1.8832
Random effects:
Groups Name Variance Std.Dev.
phase (Intercept) 0.05319 0.2306
Residual 0.77150 0.8784
Number of obs: 800, groups: phase, 5
Fixed effects:
Estimate Std. Error df t value Pr(>|t|)
(Intercept) -8.153e-16 1.077e-01 4.992e+00 0.000 1.000000
log_count 3.640e-01 3.156e-02 7.966e+02 11.532 < 2e-16 ***
Q7_Q7_1 -1.827e-01 3.902e-02 7.950e+02 -4.683 3.33e-06 ***
Q7_Q7_2 1.861e-01 3.998e-02 7.950e+02 4.655 3.80e-06 ***
Q8_Q8_1 -8.804e-02 3.435e-02 7.950e+02 -2.563 0.010547 *
Q10 1.226e-01 3.351e-02 7.950e+02 3.658 0.000271 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Correlation of Fixed Effects:
(Intr) lg_cnt Q7_Q7_1 Q7_Q7_2 Q8_Q8_
log_count 0.000
Q7_Q7_1 0.000 0.092
Q7_Q7_2 0.000 -0.050 -0.556
Q8_Q8_1 0.000 -0.062 -0.085 -0.167
Q10 0.000 -0.066 -0.021 -0.120 -0.273
anova(fit.lmer.reduced, fit.lmer)
Data: df
Models:
fit.lmer.reduced: log_total ~ log_count + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10 + (1 | phase)
fit.lmer: log_total ~ Group + log_count + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10 + (1 | phase)
npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
fit.lmer.reduced 8 2091.2 2128.7 -1037.6 2075.2
fit.lmer 11 2071.8 2123.3 -1024.9 2049.8 25.388 3 1.281e-05 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
# phase is nested within group
fit.requirement.full <- lmer(log_user_requirement ~ factor(Group) + log_count + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10 + ( 1 | phase), data = df, REML = FALSE)
Anova(fit.requirement.full, type=3, test.statistics="F")
Analysis of Deviance Table (Type III Wald chisquare tests)
Response: log_user_requirement
Chisq Df Pr(>Chisq)
(Intercept) 2.5959 1 0.1071398
factor(Group) 17.5649 3 0.0005407 ***
log_count 65.8539 1 4.856e-16 ***
Q7_Q7_1 23.4133 1 1.307e-06 ***
Q7_Q7_2 8.1810 1 0.0042331 **
Q8_Q8_1 0.1217 1 0.7271747
Q10 9.7952 1 0.0017497 **
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
fit.requirement.full
Linear mixed model fit by maximum likelihood ['lmerModLmerTest']
Formula: log_user_requirement ~ factor(Group) + log_count + Q7_Q7_1 +
Q7_Q7_2 + Q8_Q8_1 + Q10 + (1 | phase)
Data: df
AIC BIC logLik deviance df.resid
2139.609 2191.140 -1058.805 2117.609 789
Random effects:
Groups Name Std.Dev.
phase (Intercept) 0.2436
Residual 0.9018
Number of obs: 800, groups: phase, 5
Fixed Effects:
(Intercept) factor(Group)1 factor(Group)2 factor(Group)3 log_count Q7_Q7_1 Q7_Q7_2
-0.20567 0.30406 0.15157 0.34699 0.26363 -0.19470 0.11814
Q8_Q8_1 Q10
-0.01233 0.10947
# histograms for two factors
boxplot(log_user_requirement ~ Group * phase, data = df, xlab="Group.Phase", ylab="log_user_requirement")
with(df, interaction.plot(Group, phase, log_user_requirement, ylim=c(0, max(log_user_requirement)))) # interaction plot
# phase is nested within group
fit.requirement <- lmer(log_user_requirement ~ log_count + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10 + ( 1 | phase), data = df, REML = FALSE)
Anova(fit, type=3, test.statistics="F")
Anova Table (Type III tests)
Response: log_total
Sum Sq Df F value Pr(>F)
(Intercept) 0.00 1 0.0000 1.000000
log_count 91.62 1 110.3216 < 2.2e-16 ***
Q7_Q7_1 17.39 1 20.9366 5.507e-06 ***
Q7_Q7_2 16.97 1 20.4327 7.117e-06 ***
Q8_Q8_1 4.90 1 5.9015 0.015349 *
Q10 10.59 1 12.7473 0.000378 ***
Residuals 659.37 794
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
fit.requirement
Linear mixed model fit by maximum likelihood ['lmerModLmerTest']
Formula: log_user_requirement ~ log_count + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10 + (1 | phase)
Data: df
AIC BIC logLik deviance df.resid
2150.984 2188.461 -1067.492 2134.984 792
Random effects:
Groups Name Std.Dev.
phase (Intercept) 0.2442
Residual 0.9117
Number of obs: 800, groups: phase, 5
Fixed Effects:
(Intercept) log_count Q7_Q7_1 Q7_Q7_2 Q8_Q8_1 Q10
-1.007e-14 2.720e-01 -1.869e-01 1.142e-01 -6.273e-03 1.177e-01
plot(resid(m, type = "pearson") ~ fitted(m))
qqnorm(resid(m, type = "pearson"))
qqline(resid(m, type = "pearson"))
anova(fit.requirement, fit.requirement.full)
Data: df
Models:
fit.requirement: log_user_requirement ~ log_count + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10 + (1 | phase)
fit.requirement.full: log_user_requirement ~ factor(Group) + log_count + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10 + (1 | phase)
npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
fit.requirement 8 2151.0 2188.5 -1067.5 2135.0
fit.requirement.full 11 2139.6 2191.1 -1058.8 2117.6 17.374 3 0.0005919 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1